#### パッケージの読み込み ####
require(ebal)

#### データの読み込み ####
young.data.raw <- read.csv("young_data.csv")
adult.data.raw <- read.csv("adult_data.csv")

## 不注意回答者の除外
young.data <- subset(young.data.raw, ! ((young.data.raw$age < 18 & young.data.raw$education > 3) | 
                                          (young.data.raw$age < 21 & young.data.raw$education == 7)))
adult.data <- subset(adult.data.raw, check.1 > 4 & check.2 < 4)

#### 若年層調査データの事後層化ウェイトの作成 ####
## デモグラフィック変数の再コーディング
young.data$age.group <- young.data$age - 14
young.data$education.group <- ifelse(young.data$education < 3, young.data$education, 
                                     ifelse(young.data$education < 6, 3, 4))
young.data$region <- ifelse(young.data$prefecture < 8, 1, 
                            ifelse(young.data$prefecture < 15, 2, 
                                   ifelse(young.data$prefecture < 25, 3, 
                                          ifelse(young.data$prefecture < 31, 4, 
                                                 ifelse(young.data$prefecture < 36, 5, 
                                                        ifelse(young.data$prefecture < 40, 6, 7))))))

## 母集団の周辺分布
young.data.population.gender <- c(5633, 5329) / sum(c(5633, 5329))
young.data.population.age <- c(1174, 1201, 1203, 1229, 1233, 1226, 1242, 1238) / 
  sum(c(1174, 1201, 1203, 1229, 1233, 1226, 1242, 1238))
young.data.population.education <- c(814999.4, 4681049.2, 1189431.8, 2777141) / 
  sum(c(814999.4, 4681049.2, 1189431.8, 2777141))
young.data.population.region <- c(1019, 3356, 1777, 1661, 556, 265, 1098) / 
  sum(c(1019, 3356, 1777, 1661, 556, 265, 1098))

## エントロピーバランシング
young.data.ebalance.result <- ebalance(c(rep(0, nrow(young.data)), rep(1, nrow(young.data))), 
                                       rbind(cbind(young.data$gender, diag(8)[young.data$age.group, ][, -1],
                                                   diag(4)[young.data$education.group, ][, -1], 
                                                   diag(7)[young.data$region, ][, -1]),
                                             tcrossprod(rep(1, nrow(young.data)),
                                                        c(young.data.population.gender[-1], young.data.population.age[-1], 
                                                          young.data.population.education[-1], young.data.population.region[-1]))))
young.data$weight <- young.data.ebalance.result$w

#### 全年齢調査データの事後層化ウェイトの作成 ####
## デモグラフィック変数の再コーディング
adult.data$age.group <- ifelse(adult.data$age < 25, 1, 
                             ifelse(adult.data$age < 30, 2, 
                                    ifelse(adult.data$age < 35, 3, 
                                           ifelse(adult.data$age < 40, 4, 
                                                  ifelse(adult.data$age < 45, 5, 
                                                         ifelse(adult.data$age < 50, 6, 
                                                                ifelse(adult.data$age < 55, 7, 
                                                                       ifelse(adult.data$age < 60, 8, 
                                                                              ifelse(adult.data$age < 65, 9, 
                                                                                     ifelse(adult.data$age < 70, 10, 
                                                                                            ifelse(adult.data$age < 75, 11, 12)))))))))))
adult.data$education.group <- ifelse(adult.data$education < 3, adult.data$education, 
                                     ifelse(adult.data$education < 6, 3, 4))
adult.data$region <- ifelse(adult.data$prefecture < 8, 1, 
                       ifelse(adult.data$prefecture < 15, 2,  
                              ifelse(adult.data$prefecture < 25, 3, 
                                     ifelse(adult.data$prefecture < 31, 4, 
                                            ifelse(adult.data$prefecture < 36, 5, 
                                                   ifelse(adult.data$prefecture < 40, 6, 7))))))

## 母集団の周辺分布
adult.data.population.gender <- c(61841738, 65253007) / sum(c(61841738, 65253007))
adult.data.population.age <- c(8369232, 6409612, 7290878, 8316157, 9732218, 8662804, 
                               7930296, 7515246, 8455010, 9643867, 7695811, 6276856) / 
  sum(c(8369232, 6409612, 7290878, 8316157, 9732218, 8662804, 
        7930296, 7515246, 8455010, 9643867, 7695811, 6276856))
adult.data.population.education <- c(16756162, 41400268, 13187048, 17716535) / 
  sum(c(16756162, 41400268, 13187048, 17716535))
adult.data.population.region <- c(14267, 43132, 23223, 20681, 7406, 2347, 14405) / 
  sum(c(14267, 43132, 23223, 20681, 7406, 2347, 14405))

## エントロピーバランシング
adult.data.ebalance.result <- ebalance(c(rep(0, nrow(adult.data)), rep(1, nrow(adult.data))), 
                                  rbind(cbind(adult.data$gender, diag(12)[adult.data$age.group, ][, -1],
                                              diag(4)[adult.data$education.group, ][, -1], 
                                              diag(7)[adult.data$region, ][, -1]),
                                        tcrossprod(rep(1, nrow(adult.data)),
                                                   c(adult.data.population.gender[-1], adult.data.population.age[-1], 
                                                     adult.data.population.education[-1], adult.data.population.region[-1]))))
adult.data$weight <- adult.data.ebalance.result$w